Descriptive Information

Load initial script

Code
source("0_load_data.R")

Table describing each variable

Code
# Function to map variable prefix to study wave
get_study_wave = function(var_name) {
  prefix = substr(var_name, 1, 1)
  wave_map = c(
    "a" = "1st Contact",
    "b" = "2 Year",
    "c" = "3 Year",
    "d" = "4 Year",
    "e" = "In Home",
    "g" = "7 Year",
    "h" = "8 Year",
    "i" = "9 Year",
    "j" = "10 Year",
    "l" = "12 Year",
    "n" = "14 Year",
    "p" = "16 Year",
    "r" = "18 Year",
    "u" = "21 Year",
    "z" = "26 Year"
  )
  return(wave_map[prefix])
}

# Function to clean description labels
clean_description = function(x) {
  x %>%
    # Remove range info at end (e.g., ", 0-22", ", 1-9", ", 4-11")
    str_remove(", \\d+-\\d+$") %>%
    # Remove "see value labels"
    str_remove(", see value labels") %>%
    str_remove("see value labels") %>%
    # Spell out abbreviations inline
    str_replace("SOC employment", "Standard Occupational Classification employment") %>%
    str_replace("MHQ\\)", "Mental Health Questionnaire)") %>%
    str_replace("SLQ\\)", "questionnaire)") %>%
    str_replace("G-game", "G-game (general cognitive ability)") %>%
    str_replace_all(" qnr", " questionnaire") %>%
    # Clean up extra whitespace

    str_squish()
}

v_rq1x = 
  data.frame(
    `Short Label` = rq1x_labels_clean,
    Description = rq1x_labels,
    `Teds Code` = ifelse(rq1x %in% colnames(df0), rq1x,paste0(rq1x,"*")),
    `Range or Level` = sapply(rq1x, function(var) {
      if (class(df[[var]]) == "numeric") {
        paste0(round(min(df[[var]], na.rm = TRUE), 2), " — ", round(max(df[[var]], na.rm = TRUE), 2))
      } else if (is.factor(df[[var]])) {
        factor_levels = levels(df[[var]])
        paste(c(paste0(factor_levels[1],"*"), factor_levels[-1]), collapse = ", ")
      } else {
        paste(unique(df[[var]]), collapse = ", ")
      }
    }),
    N = sapply(rq1x, function(var) {
      sum(!is.na(df[[var]]))
    }),
    `Study Wave` = sapply(rq1x, get_study_wave)
  )

v_rq1x$Description = str_remove(v_rq1x$Description,"\\(1st.*")
v_rq1x$Description = clean_description(v_rq1x$Description)

# Manual edit for cens01pop98density study wave
v_rq1x$`Study.Wave`[v_rq1x$`Teds.Code` == "cens01pop98density"] = "1st Contact"
v_rq1x$`Study.Wave`[v_rq1x$`Teds.Code` == "pollution1998pca"] = "1st Contact"

# Clean uniform descriptions for participation indicator variables
rq1y_twin_descriptions = c(
  "Parent-report twin booklet data present",
  "Web test data present",
  "Self-report questionnaire data present",
  "Self-report behaviour booklet data present",
  "Self-report questionnaire data present",
  "TEDS21 phase 1 self-report questionnaire data present",
  "TEDS26 Mental Health Questionnaire data present",
  "CATSLife web test data present"
)

v_rq1y =
  data.frame(
    `Short Label` = rq1y_twin_labels_clean,
    Description = rq1y_twin_descriptions,
    `Teds Code` = ifelse(rq1y_twin1 %in% colnames(df0), rq1y_twin1, paste0(rq1y_twin1, "*")),
    `Range or Level` = sapply(rq1y_twin1, function(var) {
      if (class(df[[var]]) == "numeric") {
        paste0(round(min(df[[var]], na.rm = TRUE), 2), " — ", round(max(df[[var]], na.rm = TRUE), 2))
      } else if (is.factor(df[[var]])) {
        factor_levels = levels(df[[var]])
        paste(c(paste0(factor_levels[1], "*"), factor_levels[-1]), collapse = ", ")
      } else {
        paste(unique(df[[var]]), collapse = ", ")
      }
    }),
    N = sapply(rq1y_twin1, function(var) {
      sum(!is.na(df[[var]]))
    }),
    `Study Wave` = sapply(rq1y_twin, get_study_wave)
  )

v_rq2 = data.frame(
  `Short Label` = rq2y_labels_short,
  Description   = rq2y_labels,
  `Teds Code` = ifelse(rq2y %in% colnames(df0), rq2y,paste0(rq2y,"*")),
  `Range or Level` = sapply(rq2y, function(var) {
    if (class(df[[var]]) == "numeric") {
      paste0(round(min(df[[var]], na.rm = TRUE), 2), " — ", round(max(df[[var]], na.rm = TRUE), 2))
    } else if (is.factor(df[[var]])) {
      factor_levels = levels(df[[var]])
      paste(c(paste0(factor_levels[1],"*"), factor_levels[-1]), collapse = ", ")
    } else {
      paste(unique(df[[var]]), collapse = ", ")
    }
  }),
  N = sapply(rq2y, function(var) {
    sum(!is.na(df[[var]]))
  }),
  `Study Wave` = sapply(rq2y, get_study_wave)
)

v_rq2$Description = str_remove(v_rq2$Description,"\\(2.*")

v_rq3 = data.frame(
  `Short Label` = rq6y_labels,
  Description   = clean_description(var_to_label(rq6y)),
  `Teds Code` = ifelse(rq6y %in% colnames(df0), rq6y, paste0(rq6y, "*")),
  `Range or Level` = sapply(rq6y, function(var) {
    if (class(df[[var]]) == "numeric") {
      paste0(round(min(df[[var]], na.rm = TRUE), 2), " — ", round(max(df[[var]], na.rm = TRUE), 2))
    } else if (is.factor(df[[var]])) {
      factor_levels = levels(df[[var]])
      paste(c(paste0(factor_levels[1], "*"), factor_levels[-1]), collapse = ", ")
    } else {
      paste(unique(df[[var]]), collapse = ", ")
    }
  }),
  N = sapply(rq6y, function(var) {
    sum(!is.na(df[[var]]))
  }),
  `Study Wave` = sapply(rq6y, get_study_wave)
)

v_rq5 = data.frame(
  `Short Label` = rq5y_labels_short,
  Description   = clean_description(var_to_label(rq5y)),
  `Teds Code` = ifelse(rq5y %in% colnames(df0), rq5y, paste0(rq5y, "*")),
  `Range or Level` = sapply(rq5y, function(var) {
    if (class(df[[var]]) == "numeric") {
      paste0(round(min(df[[var]], na.rm = TRUE), 2), " — ", round(max(df[[var]], na.rm = TRUE), 2))
    } else if (is.factor(df[[var]])) {
      factor_levels = levels(df[[var]])
      paste(c(paste0(factor_levels[1],"*"), factor_levels[-1]), collapse = ", ")
    } else {
      paste(unique(df[[var]]), collapse = ", ")
    }
  }),
  N = sapply(rq5y, function(var) {
    sum(!is.na(df[[var]]))
  }),
  `Study Wave` = sapply(rq5y, get_study_wave)
)

# Combine all v_rq dataframes with row indices for grouping
v_rq1x_indexed = cbind(row_group = "RQ1 Variables", row_id = 1:nrow(v_rq1x), v_rq1x)
v_rq1y_indexed = cbind(row_group = "RQ1 Outcome Variables", row_id = (nrow(v_rq1x)+1):(nrow(v_rq1x)+nrow(v_rq1y)), v_rq1y)
v_rq2_indexed = cbind(row_group = "RQ2 Variables", row_id = (nrow(v_rq1x)+nrow(v_rq1y)+1):(nrow(v_rq1x)+nrow(v_rq1y)+nrow(v_rq2)), v_rq2)
v_rq3_indexed = cbind(row_group = "RQ3 Variables", row_id = (nrow(v_rq1x)+nrow(v_rq1y)+nrow(v_rq2)+1):(nrow(v_rq1x)+nrow(v_rq1y)+nrow(v_rq2)+nrow(v_rq3)), v_rq3)
v_rq5_indexed = cbind(row_group = "RQ5 Variables", row_id = (nrow(v_rq1x)+nrow(v_rq1y)+nrow(v_rq2)+nrow(v_rq3)+1):(nrow(v_rq1x)+nrow(v_rq1y)+nrow(v_rq2)+nrow(v_rq3)+nrow(v_rq5)), v_rq5)

v_rq_combined = rbind(v_rq1x_indexed, v_rq1y_indexed, v_rq2_indexed, v_rq3_indexed, v_rq5_indexed)

gt(v_rq_combined) %>%
  tab_row_group(
    label = "E) RQ4 Outcome Variables",
    rows = row_group == "RQ5 Variables"
  ) %>%
  tab_row_group(
    label = "D) RQ3 Outcome Variables",
    rows = row_group == "RQ3 Variables"
  ) %>%
  tab_row_group(
    label = "C) RQ2 Outcome Variables",
    rows = row_group == "RQ2 Variables"
  ) %>%
  tab_row_group(
    label = "B) RQ1-2 Participation Indicators (1 = yes, 0 = no)",
    rows = row_group == "RQ1 Outcome Variables"
  ) %>%
  tab_row_group(
    label = "A) Baseline variables used to predict later participation, and for IP weighting",
    rows = row_group == "RQ1 Variables"
  ) %>%
  cols_hide(c(row_group, row_id)) %>%
  cols_width(
    `Range.or.Level` ~ px(100)
  ) %>%
  cols_label_with(fn = ~ gsub("\\.", " ", .x)) %>%
  tab_style(
    style = cell_text(font = "Times New Roman", size = px(12)),
    locations = cells_body()
  ) %>%
  tab_style(
    style = cell_text(font = "Times New Roman", size = px(12)),
    locations = cells_column_labels()
  ) %>%
  tab_style(
    style = cell_text(font = "Times New Roman", size = px(12)),
    locations = cells_row_groups()
  ) %>%
  tab_footnote(
    footnote = "Note. MFQ = Mood and Feelings Questionnaire; SDQ = Strengths and Difficulties Questionnaire; GCSE = General Certificate of Secondary Education; KS3 = Key Stage 3; GAD-D = Generalized Anxiety Disorder scale; PARCA = Parent Report of Children's Abilities; MZ = Monozygotic; DZ = Dizygotic; HNC = Higher National Certificate; HND = Higher National Diploma; CSE = Certificate of Secondary Education."
  )
Short Label Description Teds Code Range or Level N Study Wave
A) Baseline variables used to predict later participation, and for IP weighting
Twin Sex Twin sex, 0female 1male sex1 0 — 1 26040 NA
Mother age at birth Age in years of natural mother at time of birth of twins amumagetw 14 — 45 25563 1st Contact
Father age at birth Age in years of natural father at time of birth of twins adadagetw 16 — 66 22975 1st Contact
Single Parent Single Parent asingle cohabiting biological mother and father / cohabiting biological parent with other*, single parent 25587 1st Contact
Zygosity Twin pair zygosity (best available estimate), 1MZ 2DZ zygos 1*, 2 26040 26 Year
Mother medical risk Mother medical risk factor composite scale amedtot -2.49 — 4.98 25864 1st Contact
Father employment level Father Standard Occupational Classification employment level afasoc2* 1*, 2, 3, 4, 5, 6, 7, 8, 9, caring for children at home, no job 23111 1st Contact
Father education level Male parent highest qualification level afahqual CSE grade 1 or O-level/GCSE grade A-C*, no qualifications, CSE grade 2-5 or O-level/GCSE grade D-G, A-level or S-level, HNC, HND, undergraduate degree, postgraduate qualification 23049 1st Contact
Mother employment level Mother Standard Occupational Classification employment level amosoc2* caring for children at home*, 1, 2, 3, 4, 5, 6, 7, 8, 9, no job 25650 1st Contact
Mother education level Female parent highest qualification level amohqual no qualifications*, CSE grade 2-5 or O-level/GCSE grade D-G, CSE grade 1 or O-level/GCSE grade A-C, A-level or S-level, HNC, HND, undergraduate degree, postgraduate qualification 25629 1st Contact
Twin medical risk Twin medical risk factor composite scale atwmed1 -1.88 — 3.33 25832 1st Contact
Ethnic origin Ethnic origin of twins, original codes aethnicc White*, Asian, Black, Mixed race, Other 25946 1st Contact
Language at home Main language spoken at home alang other*, English, English + other 25678 1st Contact
Older siblings Number of older siblings anoldsib 0*, 1, 2, 3, 4, 5 or more 26040 1st Contact
Younger siblings Number of younger siblings anyngsib 0*, 1, 2 or more 26040 1st Contact
Twins club member Member of a Twins Club atwclub 0*, 1 25148 1st Contact
Childcare by others Twins looked after by anyone else alookels 0*, 1 24791 1st Contact
Smoking in pregnancy Smoked cigarettes while pregnant asmoke 0*, 1 25932 1st Contact
Alcohol in pregnancy Drank alcohol while pregnant adrink 0*, 1 25783 1st Contact
Severe stress in pregnancy Severe stress during pregnancy astress 0*, 1 25889 1st Contact
Pollution index Principal Component of 1998 pollution variables pollution1998pca -2.47 — 4.57 23030 1st Contact
B) RQ1-2 Participation Indicators (1 = yes, 0 = no)
Y4 (parent-report twin booklet) Parent-report twin booklet data present dtwdata1 0 — 1 26040 4 Year
Y12 (web tests) Web test data present lcwdata1 0 — 1 26040 12 Year
Y12 (questionnaire) Self-report questionnaire data present lcqdata1 0 — 1 26040 12 Year
Y16 (behaviour booklet) Self-report behaviour booklet data present pcbhdata1 0 — 1 26040 16 Year
Y18 (questionnaire) Self-report questionnaire data present rcqdata1 0 — 1 26040 18 Year
Y21 (TEDS21 phase-1 questionnaire) TEDS21 phase 1 self-report questionnaire data present u1cdata1 0 — 1 26040 21 Year
Y26 (TEDS26 questionnaire) TEDS26 Mental Health Questionnaire data present zmhdata1 0 — 1 26040 26 Year
Y26 (CATSLife web tests) CATSLife web test data present zcdata1 0 — 1 26040 26 Year
C) RQ2 Outcome Variables
Maternal Education Maternal Education (formatted as numeric variable) amohqualn1 1 — 8 25629 1st Contact
Vocabulary Vocabulary total score bvocab1 0 — 100 11830 2 Year
Grammar Grammar composite score bgramma1 0 — 2 11783 2 Year
Parent-admin cognition Parent-administered Parca mean score badparn1 -3.09 — 3.45 11762 2 Year
Parent-report cognition Parent-reported Parca total score breparc1 0 — 26 11872 2 Year
Conduct problems Conduct SDQ-comparable Behar subscale bsdqccont1 0 — 8 11815 2 Year
Emotional problems Emotion SDQ-comparable Behar subscale bsdqcemot1 0 — 4 11823 2 Year
Hyperactivity Hyperactivity SDQ-comparable Behar subscale bsdqchypt1 0 — 6 11836 2 Year
Peer problems Peer SDQ-comparable Behar subscale bsdqcpert1 0 — 6 11565 2 Year
Prosocial behavior Prosocial SDQ-comparable Behar subscale bsdqcprot1 0 — 10 11729 2 Year
D) RQ3 Outcome Variables
Y12: Depression (MFQ) MFQ scale from 11 MFQ items (child self-report) at 12 lcmfqt1 0 — 22 11432 12 Year
Y12: Externalising SDQ Externalising scale at 12 lsdqext1 0 — 20 11389 12 Year
Y12: Cognitive ability G composite scale from child web tests at 12, standardised lcg1 -3.67 — 3.04 8458 12 Year
Y16: GCSE core subjects grade Core subjects (English, maths, science): mean grade in GCSE results (twin exams at 16) pcexgcsecoregrdm1 4 — 11 12982 16 Year
E) RQ4 Outcome Variables
Y12: Cognitive ability G composite scale from child web tests at 12, standardised lcg1 -3.67 — 3.04 8458 12 Year
Y14: Cognitive ability G composite scale from child web tests at 14, standardised ncg1 -4.12 — 3.17 5341 14 Year
Y16: Cognitive ability G composite scale from child web tests at 16, standardised pcg1 -2.86 — 4.06 4767 16 Year
Y21: G-game total score G-game (general cognitive ability) overall total score ucgt1 3 — 40 4549 21 Year
Y14: KS3 academic achievement End of KS3 all-subject Academic achievement mean level (from parent questionnaire) npks3tall1 1 — 9 5436 14 Year
Y16: GCSE core subjects grade Core subjects (English, maths, science): mean grade in GCSE results (twin exams at 16) pcexgcsecoregrdm1 4 — 11 12982 16 Year
Y21: Highest qualification Twin probable highest level of qualification including current study (TEDS21 phase 1 twin questionnaire), 1-11 u1chqualp1 1 — 11 8912 21 Year
Y26: Highest qualification Demographics item: highest qualification ordinal level (TEDS26 twin Mental Health Questionnaire) zmhhqual1 1 — 11 8243 26 Year
Y12: Depression (MFQ) MFQ scale from 11 MFQ items (child self-report) at 12 lcmfqt1 0 — 22 11432 12 Year
Y16: Depression (MFQ) MFQ total scale (child behaviour questionnaire at 16) pcbhmfqt1 0 — 26 9906 16 Year
Y21: Depression (MFQ) MFQ overall total score (TEDS21 phase 1 twin questionnaire) u1cmfqt1 0 — 16 9204 21 Year
Y26: Depression (MFQ) MFQ overall total score (TEDS26 twin Mental Health Questionnaire) zmhmfqt1 0 — 26 8306 26 Year
Y21: Anxiety (GAD-D) General Anxiety overall total score (TEDS21 phase 2 twin questionnaire) u2cganxt1 0 — 40 8236 21 Year
Y26: Anxiety (GAD-D) GAD-D (General Anxiety) overall total score (TEDS26 twin Mental Health Questionnaire) zmhganxt1 0 — 40 8022 26 Year
Y12: Externalising SDQ Externalising scale at 12 lsdqext1 0 — 20 11389 12 Year
Y16: Externalising SDQ Externalising scale at 16 psdqext1 0 — 20 9889 16 Year
Y21: Externalising SDQ Externalising scale at 21 usdqext1 0 — 19 9210 21 Year
Y26: Externalising SDQ Externalising scale at 26 zsdqext1 0 — 19 7718 26 Year
Note. MFQ = Mood and Feelings Questionnaire; SDQ = Strengths and Difficulties Questionnaire; GCSE = General Certificate of Secondary Education; KS3 = Key Stage 3; GAD-D = Generalized Anxiety Disorder scale; PARCA = Parent Report of Children's Abilities; MZ = Monozygotic; DZ = Dizygotic; HNC = Higher National Certificate; HND = Higher National Diploma; CSE = Certificate of Secondary Education.

Descriptive stats on specific variables

Age of twins at each time point

View full-size plot

Code
df_age = df %>%
  select(contains("age")) %>%
  select(-contains("genpro"), -contains("mumage"), - contains("dadage"), -ends_with("2")) 

df_age_long = df_age %>%
  `colnames<-`(var_to_label(colnames(df_age))) %>%
  pivot_longer(cols = everything())

df_age_long %>% 
  mutate(name = factor(name, levels = var_to_label(colnames(df_age)))) %>%
  ggplot(aes(x=value)) + 
  geom_histogram(bins = 100) +
  facet_wrap(~name, ncol =3) + 
  scale_x_continuous(breaks = seq(0,30,by=2)) +
  theme_bw() +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor.y = element_blank()
    )
Warning: Removed 202363 rows containing non-finite outside the scale range
(`stat_bin()`).

Code
save_plot("11_participant_ages_each_timepoint", width = 12, height = 12)
Warning: Removed 202363 rows containing non-finite outside the scale range
(`stat_bin()`).
Removed 202363 rows containing non-finite outside the scale range
(`stat_bin()`).

Education

Code
all(table(df$amohqual)==table(df$amohqualn))
[1] TRUE
Code
df %>%
  filter(twin == 1) %>%
  count(amohqual) %>%
  mutate(
    numeric_code = as.numeric(amohqual),
    percent = n / sum(n)
  ) %>%
  select(numeric_code, amohqual, n, percent) %>%
  gt() %>%
  cols_label(
    numeric_code = "Code",
    amohqual = "Education Level",
    n = "N",
    percent = "Percent"
  ) %>%
  fmt_percent(columns = percent)
Code Education Level N Percent
1 no qualifications 1208 9.28%
2 CSE grade 2-5 or O-level/GCSE grade D-G 2000 15.36%
3 CSE grade 1 or O-level/GCSE grade A-C 4779 36.71%
4 A-level or S-level 1706 13.10%
5 HNC 359 2.76%
6 HND 486 3.73%
7 undergraduate degree 1495 11.48%
8 postgraduate qualification 782 6.01%
NA NA 205 1.57%

Relationship between maternal education and child outcomes

In some analyses, maternal education is treated as a numeric variable (1-8), which assumes a linear relationship with child outcomes. To check whether this assumption is reasonable, we examine the predicted child education & cognition outcomes at each maternal education level.

We regress child outcomes on maternal education using dummy coding (i.e., treating each education level as a separate category). The plots below show the predicted outcome for each education level. If the linearity assumption holds, these points should fall approximately on a straight line.

We find that mothers with A Levels have generally similar child outcomes to those with Higher National Certificate and Higher National Diploma qualifications. Thus, the assumption of linearity seems unreasonable, and it might be problematic to have these two categories as separate numeric codes.

Code
# Helper function to create marginal effects plot
plot_education_effects = function(model, y_label) {
  marginaleffects::predictions(model, by = "amohqual") %>%
    data.frame() %>%
    rename(
      prediction = estimate,
      education_level = amohqual
    ) %>%
    ggplot(aes(y = prediction, x = education_level, group = 1)) +
    geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.2) +
    geom_point(size = 2) +
    geom_line() +
    labs(
      x = "Maternal Education Level",
      y = y_label
    ) +
    theme_bw() +
    theme(
      axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)
    )
}

KS3 Academic Achievement (Age 14)

Code
model_ks3 = df %>%
  filter(twin == 1) %>%
  filter(!is.na(amohqual)) %>%
  lm(npks3tall1 ~ amohqual, data = .)

summary(model_ks3)

Call:
lm(formula = npks3tall1 ~ amohqual, data = .)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.8335 -0.4234  0.0387  0.4676  3.4857 

Coefficients:
                                                Estimate Std. Error t value
(Intercept)                                      5.23404    0.06752  77.516
amohqualCSE grade 2-5 or O-level/GCSE grade D-G  0.28023    0.08062   3.476
amohqualCSE grade 1 or O-level/GCSE grade A-C    0.53886    0.07145   7.541
amohqualA-level or S-level                       0.78125    0.07623  10.248
amohqualHNC                                      0.66686    0.10415   6.403
amohqualHND                                      0.66195    0.09346   7.082
amohqualundergraduate degree                     0.97547    0.07648  12.754
amohqualpostgraduate qualification               1.01003    0.08401  12.023
                                                Pr(>|t|)    
(Intercept)                                      < 2e-16 ***
amohqualCSE grade 2-5 or O-level/GCSE grade D-G 0.000517 ***
amohqualCSE grade 1 or O-level/GCSE grade A-C   6.32e-14 ***
amohqualA-level or S-level                       < 2e-16 ***
amohqualHNC                                     1.79e-10 ***
amohqualHND                                     1.80e-12 ***
amohqualundergraduate degree                     < 2e-16 ***
amohqualpostgraduate qualification               < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.7397 on 2693 degrees of freedom
  (10114 observations deleted due to missingness)
Multiple R-squared:  0.1102,    Adjusted R-squared:  0.1078 
F-statistic: 47.63 on 7 and 2693 DF,  p-value: < 2.2e-16
Code
plot_education_effects(model_ks3, "Predicted KS3 Score")

Code
save_plot("11_meducation_ks3_plot", width = 6, height = 5)

GCSE Core Subjects Grade (Age 16)

Code
model_gcse = df %>%
  filter(twin == 1) %>%
  filter(!is.na(amohqual)) %>%
  lm(pcexgcsecoregrdm1 ~ amohqual, data = .)

summary(model_gcse)

Call:
lm(formula = pcexgcsecoregrdm1 ~ amohqual, data = .)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.8174 -0.6731  0.0569  0.8087  3.0915 

Coefficients:
                                                Estimate Std. Error t value
(Intercept)                                      7.90847    0.06435 122.894
amohqualCSE grade 2-5 or O-level/GCSE grade D-G  0.28280    0.07571   3.735
amohqualCSE grade 1 or O-level/GCSE grade A-C    0.76463    0.06818  11.214
amohqualA-level or S-level                       1.20897    0.07350  16.449
amohqualHNC                                      1.13466    0.10065  11.274
amohqualHND                                      1.21039    0.09213  13.137
amohqualundergraduate degree                     1.73733    0.07321  23.729
amohqualpostgraduate qualification               1.79211    0.08073  22.198
                                                Pr(>|t|)    
(Intercept)                                      < 2e-16 ***
amohqualCSE grade 2-5 or O-level/GCSE grade D-G 0.000189 ***
amohqualCSE grade 1 or O-level/GCSE grade A-C    < 2e-16 ***
amohqualA-level or S-level                       < 2e-16 ***
amohqualHNC                                      < 2e-16 ***
amohqualHND                                      < 2e-16 ***
amohqualundergraduate degree                     < 2e-16 ***
amohqualpostgraduate qualification               < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.105 on 6431 degrees of freedom
  (6376 observations deleted due to missingness)
Multiple R-squared:  0.1821,    Adjusted R-squared:  0.1812 
F-statistic: 204.5 on 7 and 6431 DF,  p-value: < 2.2e-16
Code
plot_education_effects(model_gcse, "Predicted GCSE Score")

Code
save_plot("11_meducation_gcse_plot", width = 6, height = 5)

Cognitive Ability (Age 12)

Code
model_cog = df %>%
  filter(twin == 1) %>%
  filter(!is.na(amohqual)) %>%
  lm(lcg1 ~ amohqual, data = .)

summary(model_cog)

Call:
lm(formula = lcg1 ~ amohqual, data = .)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.8039 -0.6214  0.0563  0.6778  2.6524 

Coefficients:
                                                Estimate Std. Error t value
(Intercept)                                     -0.54041    0.06525  -8.282
amohqualCSE grade 2-5 or O-level/GCSE grade D-G  0.23722    0.07851   3.022
amohqualCSE grade 1 or O-level/GCSE grade A-C    0.43448    0.06979   6.225
amohqualA-level or S-level                       0.67126    0.07566   8.872
amohqualHNC                                      0.58609    0.10728   5.463
amohqualHND                                      0.65015    0.09575   6.790
amohqualundergraduate degree                     0.92674    0.07491  12.372
amohqualpostgraduate qualification               0.93773    0.08317  11.275
                                                Pr(>|t|)    
(Intercept)                                      < 2e-16 ***
amohqualCSE grade 2-5 or O-level/GCSE grade D-G  0.00253 ** 
amohqualCSE grade 1 or O-level/GCSE grade A-C   5.28e-10 ***
amohqualA-level or S-level                       < 2e-16 ***
amohqualHNC                                     4.95e-08 ***
amohqualHND                                     1.28e-11 ***
amohqualundergraduate degree                     < 2e-16 ***
amohqualpostgraduate qualification               < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.9634 on 4195 degrees of freedom
  (8612 observations deleted due to missingness)
Multiple R-squared:  0.07031,   Adjusted R-squared:  0.06876 
F-statistic: 45.32 on 7 and 4195 DF,  p-value: < 2.2e-16
Code
plot_education_effects(model_cog, "Predicted Cognitive Score (g)")

Code
save_plot("11_meducation_cognitive_plot", width = 6, height = 5)